mlehotay

developer blog and motley assemblage of data science projects

Analysis of World of Warcraft PvP Leaderboards


I recently scraped the PvP leaderboards from the World of Warcraft website and wrote them out to a SQLite database. Let's plot some charts of the data and see if anything interesting turns up.

There are three leaderboards: the 2v2 Arena, the 3v3 Arena, and the 10x10 Battleground. Each leaderboard lists the top 1000 players by rating. (I think these are Elo ratings.) Anyway, I saved the leaderboards to the SQL database as three separate tables. I also scraped a minimal amount of data from the profile pages of each of the characters on the leaderboards and saved the profile data to a fourth table.

import sqlite3
import altair as alt
import pandas as pd
query = '''
SELECT *
FROM sqlite_master 
WHERE type='table'
'''
con = sqlite3.connect('data/wow.db')
pd.read_sql(query, con)
type name tbl_name rootpage sql
0 table arena_2v2 arena_2v2 2 CREATE TABLE "arena_2v2" (\n"rank" INTEGER,\n ...
1 table arena_3v3 arena_3v3 32 CREATE TABLE "arena_3v3" (\n"rank" INTEGER,\n ...
2 table battlegrounds battlegrounds 62 CREATE TABLE "battlegrounds" (\n"rank" INTEGER...
3 table players players 93 CREATE TABLE "players" (\n"name" TEXT,\n "tit...
query = '''
    SELECT name, rank, rating, wins, losses, achievement, ilvl
    FROM battlegrounds
    LEFT JOIN players
    ON battlegrounds.url = players.URL
'''
con = sqlite3.connect('data/wow.db')
df = pd.read_sql(query, con)
con.close()
df
name rank rating wins losses achievement ilvl
0 Crdefender 1 2163 81 4 18265.0 456.0
1 Lifeswaplol 1 2163 80 3 8945.0 462.0
2 Wolf 3 2153 77 4 18155.0 463.0
3 Intricate 4 2135 72 4 10530.0 455.0
4 Jøkes 5 2125 69 2 15550.0 459.0
... ... ... ... ... ... ... ...
995 Evileretta 995 1443 9 14 12470.0 452.0
996 Idoless 995 1443 10 12 13985.0 447.0
997 Vyaz 995 1443 8 3 18820.0 457.0
998 Xwarlord 995 1443 12 14 10505.0 447.0
999 Thedarklord 1000 1442 15 12 16195.0 437.0

1000 rows × 7 columns


df['num_matches'] =  df['wins'] + df['losses']
df['win_ratio'] = df['wins'] / df['num_matches']
alt.Chart(df).mark_point().encode(x='rank', y='rating')
alt.Chart(df).mark_point().encode(x='rank', y='ilvl')
alt.Chart(df).mark_point().encode(alt.X('rating:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('ilvl:Q', scale=alt.Scale(zero=False)))
alt.Chart(df).mark_point().encode(x='rank', y='achievement')
alt.Chart(df).mark_point().encode(x='rank', y='win_ratio')
alt.Chart(df).mark_point().encode(alt.X('win_ratio:Q', scale=alt.Scale(zero=False)), y='num_matches')
alt.Chart(df).mark_point().encode(alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)), x='num_matches')
alt.Chart(df).mark_point().encode(x='num_matches', y='wins')
alt.Chart(df).mark_bar().encode(alt.X("rating:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("rank:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("ilvl:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("achievement:Q", bin=True), y='count()')
alt.Chart(df).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)),
    alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False))
).properties(
    width=100,
    height=100
).repeat(
    row=['rank', 'rating', 'win_ratio', 'ilvl', 'achievement'],
    column=['rank', 'rating', 'win_ratio', 'ilvl', 'achievement']
)
query = '''
    SELECT *, '2v2' as board
    FROM arena_2v2
    UNION ALL
    SELECT *, '3v3' as board
    FROM arena_3v3
    UNION ALL
    SELECT *, 'battlegrounds' as board
    FROM battlegrounds
'''
con = sqlite3.connect('data/wow.db')
df_boards = pd.read_sql(query, con)
con.close()
df_boards
rank rating player class faction realm wins losses url board
0 1 2611 Dinoe Druid ALLIANCE Sargeras 209 25 https://worldofwarcraft.com/en-us/character/us... 2v2
1 2 2591 Thugonomiczz Warlock ALLIANCE Stormrage 95 9 https://worldofwarcraft.com/en-us/character/us... 2v2
2 3 2539 Drãke Monk ALLIANCE Stormrage 118 36 https://worldofwarcraft.com/en-us/character/us... 2v2
3 4 2514 Niarb Paladin ALLIANCE Laughing Skull 98 42 https://worldofwarcraft.com/en-us/character/us... 2v2
4 5 2499 Kubyzy Druid ALLIANCE Kel'Thuzad 115 21 https://worldofwarcraft.com/en-us/character/us... 2v2
... ... ... ... ... ... ... ... ... ... ...
2994 995 1443 Evileretta Warlock ALLIANCE Vashj 9 14 https://worldofwarcraft.com/en-us/character/us... battlegrounds
2995 995 1443 Idoless Priest HORDE Magtheridon 10 12 https://worldofwarcraft.com/en-us/character/us... battlegrounds
2996 995 1443 Vyaz Rogue ALLIANCE Stormrage 8 3 https://worldofwarcraft.com/en-us/character/us... battlegrounds
2997 995 1443 Xwarlord Priest HORDE Firetree 12 14 https://worldofwarcraft.com/en-us/character/us... battlegrounds
2998 1000 1442 Thedarklord Mage ALLIANCE Sargeras 15 12 https://worldofwarcraft.com/en-us/character/us... battlegrounds

2999 rows × 10 columns

alt.Chart(df_boards).mark_point().encode(x='rank', y='rating', color='board')
query = '''
SELECT board, rank, rating, name, title, realm, class, details,
    faction, wins, losses, achievement, ilvl, players.url
FROM (
    SELECT *, '2v2 arenas' as board
    FROM arena_2v2
    UNION
    SELECT *, '3v3 arenas' as board
    FROM arena_3v3
    UNION
    SELECT *, 'battlegrounds' as board
    FROM battlegrounds
) leaderboards
JOIN players
WHERE players.url = leaderboards.url
'''
con = sqlite3.connect('data/wow.db')
df_boards = pd.read_sql(query, con)
con.close()
df_boards
board rank rating name title realm class details faction wins losses achievement ilvl url
0 2v2 arenas 1 2611 Dinoe Notorious Gladiator Sargeras Druid 120 Night Elf Restoration Druid ALLIANCE 209 25 22180 465 https://worldofwarcraft.com/en-us/character/us...
1 3v3 arenas 113 2155 Dinoe Notorious Gladiator Sargeras Druid 120 Night Elf Restoration Druid ALLIANCE 141 50 22180 465 https://worldofwarcraft.com/en-us/character/us...
2 2v2 arenas 2 2591 Thugonomiczz Wrathful Gladiator Stormrage Warlock 120 Dwarf Destruction Warlock ALLIANCE 95 9 15320 465 https://worldofwarcraft.com/en-us/character/us...
3 2v2 arenas 3 2539 Drãke Dread Gladiator Stormrage Monk 120 Human Windwalker Monk ALLIANCE 118 36 10075 469 https://worldofwarcraft.com/en-us/character/us...
4 3v3 arenas 12 2395 Drãke Dread Gladiator Stormrage Monk 120 Human Windwalker Monk ALLIANCE 202 83 10075 469 https://worldofwarcraft.com/en-us/character/us...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2965 battlegrounds 995 1443 Xwarlord High Warlord Firetree Priest 120 Blood Elf Holy Priest HORDE 12 14 10505 447 https://worldofwarcraft.com/en-us/character/us...
2966 battlegrounds 995 1443 Aviana the Insane Doomhammer Priest 120 Night Elf Holy Priest ALLIANCE 12 14 28380 457 https://worldofwarcraft.com/en-us/character/us...
2967 battlegrounds 995 1443 Idoless Warlord Magtheridon Priest 120 Blood Elf Discipline Priest HORDE 10 12 13985 447 https://worldofwarcraft.com/en-us/character/us...
2968 battlegrounds 995 1443 Evileretta Justicar Vashj Warlock 120 Human Destruction Warlock ALLIANCE 9 14 12470 452 https://worldofwarcraft.com/en-us/character/us...
2969 battlegrounds 1000 1442 Thedarklord Vanquisher Sargeras Mage 120 Human Frost Mage ALLIANCE 15 12 16195 437 https://worldofwarcraft.com/en-us/character/us...

2970 rows × 14 columns

alt.Chart(df_boards).mark_point().encode(x='rank', y='rating', color='board')
alt.Chart(df_boards).mark_point().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('rating:Q', scale=alt.Scale(zero=False)),
                                 color='board')
alt.Chart(df_boards).mark_point(opacity=0.75).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('ilvl:Q', scale=alt.Scale(zero=False)),
                                 color='board')
alt.Chart(df_boards).mark_point(opacity=0.75, clip=True).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('ilvl:Q', scale=alt.Scale(domain=(420, 480))), color='board')
alt.Chart(df_boards).mark_point(opacity=0.75, clip=True).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('ilvl:Q', scale=alt.Scale(domain=(420, 480))),
                                 color='class')
df_boards['num_matches'] =  df_boards['wins'] + df_boards['losses']
df_boards['win_ratio'] = df_boards['wins'] / df_boards['num_matches']
alt.Chart(df_boards).mark_circle().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)), color='class')
alt.Chart(df_boards).mark_circle().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)), 
                                  alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)),
                                 color='faction')
alt.Chart(df_boards).mark_bar().encode(x='class', y='count()')
alt.Chart(df_boards).mark_bar().encode(
    x='class',
    y='count()',
    color='faction'
)
alt.Chart(df_boards).mark_bar().encode(
    x='faction',
    y='win_ratio',
)
bar = alt.Chart(df_boards).mark_bar().encode(
    x='class:O',
    y='mean(rating):Q'
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
    y='mean(rating):Q'
)
(bar + rule).properties(width=600)
bar = alt.Chart(df_boards).mark_bar().encode(
    alt.Y('mean(rating)', scale=alt.Scale(zero=False)),
    x='class'
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
    y='mean(rating)'
)
(bar + rule).properties(width=500)
df_boards['rank2'] = 1000 - df_boards['rank']
alt.Chart(df_boards).mark_bar().encode(
    alt.Y('mean(rank2)', scale=alt.Scale(zero=False)),
    x='class',
    column='board'
)
alt.Chart(df_boards).mark_bar().encode(
    alt.X('class'),
    alt.Y('mean(rank2)', scale=alt.Scale(zero=False)),
    alt.Color('board'),
    alt.Column('board')
)
bar = alt.Chart(df_boards).mark_bar().encode(
    alt.X('mean(rank2)', scale=alt.Scale(zero=False)),
    alt.Y('details', sort='-x'),
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
    x='mean(rank2)'
)
(bar + rule)
alt.Chart(df_boards).transform_fold(
    ['2v2 arenas', '3v3 arenas', 'battlegrounds'],
    as_=['Leaderboard', '# Characters']
).mark_area(
    opacity=0.5,
    interpolate='step'
).encode(
    alt.X('rating:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('board:N')
)